/*
 * Fast, but crude, pairwise structural Alignments of RNA sequences
 *
 * Possible structures of each RNA are encoded in a linear
 * "probability profile", by computing for each base the probability
 * of being unpaired, or paired upstream or downstream. These profiles
 * can be aligned using standard string alignment.
 *
 * The is an extension of the old method in ProfileDist.c with the
 * following changes:
 * - use sequence as well as structure profile for scoring
 * - use similarity alignment instead of distance (maybe add local alinment)
 * - use affine gap costs
 *
 *  C Ivo L Hofacker, Vienna RNA Package
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <float.h>
#include "ViennaRNA/dist_vars.h"
#include "ViennaRNA/fold_vars.h"
#include "ViennaRNA/partfunc/global.h"
#include "ViennaRNA/utils/basic.h"
#include "ViennaRNA/utils/log.h"
#include "ViennaRNA/profiledist.h"
#include "ViennaRNA/ProfileAln.h"


#define EQUAL(x, y)     (fabs((x) - (y)) <= fabs(x) * 2 * FLT_EPSILON)

PRIVATE int *alignment[2];

PRIVATE void
sprint_aligned_bppm(const float *T1,
                    const char  *seq1,
                    const float *T2,
                    const char  *seq2);


PRIVATE double
PrfEditScore(const float  *p1,
             const float  *p2,
             char         c1,
             char         c2);


PRIVATE double
average(double  x,
        double  y);


PRIVATE double  open = -1.5, ext = -0.666;  /* defaults from clustalw */
PRIVATE double  seqw      = 0.5;
PRIVATE int     free_ends = 1;              /* whether to use free end gaps */

/*---------------------------------------------------------------------------*/

PRIVATE float **
newmat(int  l1,
       int  l2)
{
  float **a;
  int   i;

  a = (float **)vrna_alloc((l1 + 1) * sizeof(float *));
  for (i = 0; i <= l1; i++)
    a[i] = (float *)vrna_alloc((l2 + 1) * sizeof(float));
  return a;
}


PUBLIC float
profile_aln(const float *T1,
            const char  *seq1,
            const float *T2,
            const char  *seq2)
{
  /* align the 2 probability profiles T1, T2 */
  /* This is like a Needleman-Wunsch alignment, with affine gap-costs
   * ala Gotoh. The score looks at both seq and pair profile */

  float **S, **E, **F, tot_score;
  int   i, j, length1, length2;

  length1 = strlen(seq1);
  length2 = strlen(seq2);
  S       = newmat(length1, length2);
  E       = newmat(length1, length2);
  F       = newmat(length1, length2);

  E[0][0]   = F[0][0] = open - ext;
  S[0][0]   = 0;
  tot_score = -9999.;

  for (i = 1; i <= length1; i++)
    F[i][0] = -9999;                          /* impossible */
  for (j = 1; j <= length2; j++)
    E[0][j] = -9999;                          /* impossible */
  if (!free_ends) {
    for (i = 1; i <= length1; i++)
      S[i][0] = E[i][0] = E[i - 1][0] + ext;
    for (j = 1; j <= length2; j++)
      S[0][j] = F[0][j] = F[0][j - 1] + ext;
  }

  for (i = 1; i <= length1; i++) {
    for (j = 1; j <= length2; j++) {
      float M;
      E[i][j] = MAX2(E[i - 1][j] + ext, S[i - 1][j] + open);
      F[i][j] = MAX2(F[i][j - 1] + ext, S[i][j - 1] + open);
      M       = S[i - 1][j - 1] + PrfEditScore(T1 + 3 * i, T2 + 3 * j, seq1[i - 1], seq2[j - 1]);
      S[i][j] = MAX3(M, E[i][j], F[i][j]);
    }
  }

  if (edit_backtrack) {
    double  score = 0;
    char    state = 'S';
    int     pos, i, j;
    alignment[0]  = (int *)vrna_alloc((length1 + length2 + 1) * sizeof(int));
    alignment[1]  = (int *)vrna_alloc((length1 + length2 + 1) * sizeof(int));

    pos = length1 + length2;
    i   = length1;
    j   = length2;

    tot_score = S[length1][length2];

    if (free_ends) {
      /* find starting point for backtracking,
       * search for highest entry in last row or column */
      int imax = 0;
      for (i = 1; i <= length1; i++) {
        if (S[i][length2] > score) {
          score = S[i][length2];
          imax  = i;
        }
      }
      for (j = 1; j <= length2; j++) {
        if (S[length1][j] > score) {
          score = S[length1][j];
          imax  = -j;
        }
      }
      if (imax < 0) {
        for (j = length2; j > -imax; j--) {
          alignment[0][pos]   = 0;
          alignment[1][pos--] = j;
        }
        i = length1;
      } else {
        for (i = length1; i > imax; i--) {
          alignment[0][pos]   = i;
          alignment[1][pos--] = 0;
        }
        j = length2;
      }

      tot_score = score;
    }

    while (i > 0 && j > 0) {
      switch (state) {
        case 'E':
          score               = E[i][j];
          alignment[0][pos]   = i;
          alignment[1][pos--] = 0;
          if (EQUAL(score, S[i - 1][j] + open))
            state = 'S';

          i--;
          break;
        case 'F':
          score               = F[i][j];
          alignment[0][pos]   = 0;
          alignment[1][pos--] = j;
          if (EQUAL(score, S[i][j - 1] + open))
            state = 'S';

          j--;
          break;
        case 'S':
          score = S[i][j];
          if (EQUAL(score, E[i][j])) {
            state = 'E';
          } else if (EQUAL(score, F[i][j])) {
            state = 'F';
          } else if (EQUAL(score, S[i - 1][j - 1] +
                           PrfEditScore(T1 + 3 * i, T2 + 3 * j, seq1[i - 1], seq2[j - 1]))) {
            alignment[0][pos]   = i;
            alignment[1][pos--] = j;
            i--;
            j--;
          } else {
            vrna_log_error("backtrack of alignment failed");
            free(alignment[0]);
            free(alignment[1]);
            for (i = 0; i <= length1; i++) {
              free(S[i]);
              free(E[i]);
              free(F[i]);
            }
            free(S);
            free(E);
            free(F);

            return (float)INF;
          }

          break;
      }
    }

    for (; j > 0; j--) {
      alignment[0][pos]   = 0;
      alignment[1][pos--] = j;
    }
    for (; i > 0; i--) {
      alignment[0][pos]   = i;
      alignment[1][pos--] = 0;
    }

    for (i = pos + 1; i <= length1 + length2; i++) {
      alignment[0][i - pos] = alignment[0][i];
      alignment[1][i - pos] = alignment[1][i];
    }
    alignment[0][0] = length1 + length2 - pos;   /* length of alignment */

    sprint_aligned_bppm(T1, seq1, T2, seq2);
    free(alignment[0]);
    free(alignment[1]);
  }

  for (i = 0; i <= length1; i++) {
    free(S[i]);
    free(E[i]);
    free(F[i]);
  }
  free(S);
  free(E);
  free(F);

  return tot_score;
}


/*---------------------------------------------------------------------------*/
PRIVATE inline double
average(double  x,
        double  y)
{
  /*
   * As in Bonhoeffer et al (1993) 'RNA Multi Structure Landscapes',
   * Eur. Biophys. J. 22: 13-24 we have chosen  the geometric mean.
   */
  return (float)sqrt(x * y);
}


PRIVATE double
PrfEditScore(const float  *p1,
             const float  *p2,
             char         c1,
             char         c2)
{
  double  score;
  int     k;

  for (score = 0., k = 0; k < 3; k++)
    score += average(p1[k], p2[k]);

  score *= (1 - seqw);
  if (c1 == c2)
    score += seqw;
  else if (((c1 == 'A') && (c2 == 'G')) ||
           ((c1 == 'G') && (c2 == 'A')) ||
           ((c1 == 'C') && (c2 == 'U')) ||
           ((c1 == 'U') && (c2 == 'C')))
    score += 0.5 * seqw;
  else
    score -= 0.9 * seqw;

  return score;
}


/*---------------------------------------------------------------------------*/

PRIVATE void
sprint_aligned_bppm(const float *T1,
                    const char  *seq1,
                    const float *T2,
                    const char  *seq2)
{
  int i, length;

  length = alignment[0][0];
  for (i = 0; i < 4; i++) {
    if (aligned_line[i] != NULL)
      free(aligned_line[i]);

    aligned_line[i] = (char *)vrna_alloc((length + 1) * sizeof(char));
  }
  for (i = 1; i <= length; i++) {
    if (alignment[0][i] == 0) {
      aligned_line[0][i - 1] = aligned_line[2][i - 1] = '_';
    } else {
      aligned_line[0][i - 1]  = vrna_bpp_symbol(T1 + alignment[0][i] * 3);
      aligned_line[2][i - 1]  = seq1[alignment[0][i] - 1];
    }

    if (alignment[1][i] == 0) {
      aligned_line[1][i - 1] = aligned_line[3][i - 1] = '_';
    } else {
      aligned_line[1][i - 1]  = vrna_bpp_symbol(T2 + alignment[1][i] * 3);
      aligned_line[3][i - 1]  = seq2[alignment[1][i] - 1];
    }
  }
}


PUBLIC int
set_paln_params(double  gap_open,
                double  gap_ext,
                double  seq_weight,
                int     freeends)
{
  open  = (gap_open > 0) ? -gap_open : gap_open;
  ext   = (gap_ext > 0) ? -gap_ext : gap_ext;
  if (open > ext)
    vrna_log_warning("Gap extension penalty is smaller than "
                         "gap open. Do you realy want this?");

  seqw = seq_weight;
  if (seqw < 0) {
    seqw = 0;
    vrna_log_warning("Sequence weight set to 0 (must be in [0..1])");
  } else
  if (seqw > 1) {
    seqw = 1;
    vrna_log_warning("Sequence weight set to 1 (must be in [0..1])");
  }

  free_ends = (freeends) ? 1 : 0;
  return 0;
}


/*---------------------------------------------------------------------------*/
